import pandas as pd
import numpy as np

"""
1. 查看数据集具体有些什么
"""
# 1. 查看X_train.txt
f_X_train = "dataset_uci Raw/train/X_train.txt"
f_features = "dataset_uci Raw/features.txt"
f_y_train = "dataset_uci Raw/train/y_train.txt"

features = pd.read_csv(f_features, sep=' ', skipinitialspace=True, header=None)[1].tolist()  # 特征名称
labels = pd.read_csv(f_y_train, sep=' ', skipinitialspace=True, header=None)[0].tolist()  # 数据标签

df_xtrain = pd.read_csv(f_X_train, sep=' ', skipinitialspace=True, header=None)
df_xtrain.columns = features  # 为xtrain添加特征名称
df_xtrain['label'] = labels
df_xtrain.to_excel("dataset_uci Raw/train/X_train.xlsx", index=False)
print(df_xtrain.head())
print(df_xtrain.shape)
print(df_xtrain.columns)


# fr = open(f_X_train, "r")
# for line in fr.readlines():
#     for char in line:
#         print(char, ord(char))
#     break
